package com.atguigu.chapter07.state;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.producer.ProducerRecord;

import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.util.Properties;

/**
 * @Author lizhenchao@atguigu.cn
 * @Date 2021/6/16 11:30
 */
public class Flink09_Checkpoint {
    public static void main(String[] args) {
        Configuration conf = new Configuration();
        conf.setInteger("rest.port", 20000);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
        env.setParallelism(2);
        
        // Checkpoint相关的配置
        env.enableCheckpointing(5000);
        env.setStateBackend(new HashMapStateBackend());
        env.getCheckpointConfig().setCheckpointStorage("hdfs://hadoop162:8020/ck1");
        
        // 高级选项：
        // 设置模式为精确一次 (这是默认值)
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        
        // 确认 checkpoints 之间的时间会进行 500 ms
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500);
        
        // Checkpoint 必须在一分钟内完成，否则就会被抛弃
        env.getCheckpointConfig().setCheckpointTimeout(60000);
        
        // 同一时间只允许一个 checkpoint 进行
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
        
        // 开启在 job 中止后仍然保留的 externalized checkpoints
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION);
        
        Properties sourceProps = new Properties();
        sourceProps.setProperty("bootstrap.servers", "hadoop162:9092,hadoop163:9092,hadoop164:9092");
        sourceProps.setProperty("group.id", "Flink09_Checkpoint2");
        sourceProps.setProperty("auto.offset.reset", "latest");
        
        Properties sinkProps = new Properties();
        sinkProps.setProperty("bootstrap.servers", "hadoop162:9092,hadoop163:9092,hadoop164:9092");
        
        // kafka的broker要求事务的开启到关闭的时间不能超过15分钟, flink的producer默认的事务超时时间是1个小时.
        sinkProps.put("transaction.timeout.ms", 14 * 60 * 1000);
        
        SingleOutputStreamOperator<String> stream = env
            .addSource(new FlinkKafkaConsumer<>("w1", new SimpleStringSchema(), sourceProps))
            .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
                @Override
                public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
                    for (String word : value.split(" ")) {
                        out.collect(Tuple2.of(word, 1L));
                        
                    }
                }
            })
            .keyBy(f -> f.f0)
            .sum(1)
            .map(f -> f.f0 + "_" + f.f1);
        
        stream
            .addSink(new FlinkKafkaProducer<String>(
                "w2",
                new KafkaSerializationSchema<String>() {
                    @Override
                    public ProducerRecord<byte[], byte[]> serialize(String element, @Nullable Long timestamp) {
                        return new ProducerRecord<>("w2", element.getBytes(StandardCharsets.UTF_8));
                    }
                },
                sinkProps,
                FlinkKafkaProducer.Semantic.EXACTLY_ONCE));
        
        /*stream
            .process(new ProcessFunction<String, Object>() {
                @Override
                public void processElement(String value, Context ctx, Collector<Object> out) throws Exception {
                    throw new RuntimeException("我的异常");
                }
            })
            .print();*/
        
        try {
            env.execute("test");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
